Load Libraries & Set Themes

library(devtools)   # Reproducibility (see end of file)
library(phyloseq)   # Easier data manipulation  
library(tidyverse)  # Pretty plotting and data manipulation 
library(forcats)    # Recoding factors
library(cowplot)    # Multiple plotting
library(d3heatmap)  # For nice heatmaps
library(phytools)   # Working with phylogenetic trees
library(ggtree)     # Pretty ggplot-esque phylogenetic trees
library(ape)        # Working with phylogenetic trees
library(DT)         # Pretty Tables 
library(gplots)     # Heatmap.2 function
source("code/set_colors.R")           # Set Colors for plotting

# Set some global ggplot parameters
mytheme <- theme(legend.text = element_text(size = 8), legend.title = element_text(size = 8, face = "bold"), 
                 plot.title = element_text(size = 10), legend.position = c(0.01, 0.9),
                 axis.title = element_text(size = 10, face = "bold"), axis.text = element_text(size = 8),
                 legend.key.width=unit(0.1,"line"), legend.key.height=unit(0.1,"line")) 

Load FCM Data

# Read in the data 
raw_data <- read.table(file="data/Chloroplasts_removed/old/nochloro_HNA_LNA.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

# Subset out only the Muskegon and Surface samples 
muskegon_data <- raw_data %>%
  dplyr::filter(Lake == "Muskegon" & Depth == "Surface") %>%
  dplyr::select(norep_filter_name)

# Load in the productivity data
production <- read.csv(file="data/production_data.csv", header = TRUE) %>%    
  dplyr::filter(fraction == "Free" & limnion == "Surface") %>%                         # Select only rows that are free-living
  dplyr::select(names, tot_bacprod, SD_tot_bacprod) %>%         # Select relevant columns for total productivity
  mutate(tot_bacprod = round(tot_bacprod, digits = 2),          # Round to 2 decimals
         SD_tot_bacprod = round(SD_tot_bacprod, digits = 2) ) %>%      
  dplyr::rename(norep_filter_name = names) %>%                  # Rename to match other data frame
  arrange(norep_filter_name)

# Stop if the names do not match
stopifnot(muskegon_data$norep_filter_name == production$norep_filter_name)

# Combine the two muskegon data frames into one
combined_data <- left_join(muskegon_data, production, by = "norep_filter_name")

# Merge the combined data back into the original data frame (data)
data <- left_join(raw_data, combined_data, by = "norep_filter_name") %>%
  dplyr::select(-c(Platform, Fraction)) %>%
  mutate(Lake = factor(Lake, levels = c("Michigan","Inland", "Muskegon")))
head(data)
##       samples Total.cells HNA.cells LNA.cells Total.count.sd    HNA.sd    LNA.sd     Lake Sample_16S Season     Month Year  Site Depth Total_Sequences norep_filter_name tot_bacprod SD_tot_bacprod
## 1 110D2-115-2    620420.9  139880.7  480540.2       1640.275  4639.952  5795.307 Michigan  110D2F115 Winter   Januari 2015 MM110  Deep           19654          110DF115          NA             NA
## 2 110D2-415-2    799314.0  155198.1  644115.9       9388.845  4198.741  5493.442 Michigan  110D2F415 Spring     April 2015 MM110  Deep            7951          110DF415          NA             NA
## 3   110D2-515   1261369.4  362443.6  898925.9      15341.779 10504.696  6299.495 Michigan  110D2F515 Spring       May 2015 MM110  Deep           16293          110DF515          NA             NA
## 4 110D2-715-2    542723.6  131132.1  411591.5       6139.696  2196.008  3967.958 Michigan  110D2F715 Summer      July 2015 MM110  Deep           16882          110DF715          NA             NA
## 5   110D2-815    548027.9  131820.5  416207.4       3552.010  3805.294  2441.935 Michigan  110D2F815 Summer    August 2015 MM110  Deep           22157          110DF815          NA             NA
## 6 110D2-915-2    731931.0  189746.2  542184.8      36726.396  6634.230 30473.925 Michigan  110D2F915   Fall September 2015 MM110  Deep           16500          110DF915          NA             NA
# Fix the metadata
data$Month[data$Month == "Januari"] <- "June"
data$Season[data$Season == "Winter"] <- "Summer"

# Write out the file 
#write.table(data, file="data/Chloroplasts_removed/productivity_data.tsv", row.names=TRUE)

####### HELPFUL TRANSFORMED AND SUBSETTED DATA
##### Subset Muskegon Lake Data Only 
muskegon <- dplyr::filter(data, Lake == "Muskegon" & Depth == "Surface") %>%
  mutate(Site = factor(Site, levels = c("MOT", "MDP", "MBR", "MIN"))) %>%
  filter(tot_bacprod < 90)

# Gather the data for summary statistics 
df_cells <- data %>%
  dplyr::select(1:4, Lake, Season:Depth) %>%
  rename(Total = Total.cells, HNA = HNA.cells, LNA = LNA.cells) %>%
  gather(key = FCM_type, value = num_cells, Total:LNA)

Summary & Descriptive Statistics

# Number of cells in HNA and LNA across all samples 
    # For a plot of these values, see figure 1 below
df_cells %>%
  group_by(FCM_type) %>%
  summarise(num_samples = n(),
            mean_cells = round(mean(num_cells), digits = 2),
            sd_mean_cells = round(sd(num_cells), digits = 2),
            median_cells = round(median(num_cells), digits = 2)) %>%
  datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)
# Are there more total cells in one lake over the other?
totcells_df <- df_cells %>%
  filter(FCM_type == "Total")
# Compute the analysis of variance
totcells_aov <- aov(num_cells ~ Lake, data = totcells_df)
summary(totcells_aov) # there is a difference, but which lake?
##              Df    Sum Sq   Mean Sq F value   Pr(>F)    
## Lake          2 4.332e+14 2.166e+14   37.75 2.72e-14 ***
## Residuals   170 9.755e+14 5.738e+12                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Which samples are significant from each other?
TukeyHSD(totcells_aov) # Michigan is significantly different form Muskegon and Inland 
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = num_cells ~ Lake, data = totcells_df)
## 
## $Lake
##                        diff      lwr       upr     p adj
## Inland-Michigan   3417194.2  2411737 4422651.4 0.0000000
## Muskegon-Michigan 3030294.1  1939004 4121584.0 0.0000000
## Muskegon-Inland   -386900.1 -1489077  715276.9 0.6849527
# Number of cells in HNA and LNA per ecosystem
df_cells %>%
  group_by(Lake, FCM_type) %>%
  summarise(num_samples = n(),
            mean_cells = round(mean(num_cells), digits = 2),
            median_cells = round(median(num_cells), digits = 2)) %>%
  datatable(caption = "Mean and median number of cells per ecosystem", rownames = FALSE)
# Proportion of HNA and LNA across all samples 
df_cells %>%
  dplyr::select(samples, Lake, FCM_type, num_cells) %>%
  spread(FCM_type, num_cells) %>%
  mutate(prop_HNA = HNA/Total * 100,
         prop_LNA = LNA/Total * 100) %>%
  dplyr::select(Lake, prop_HNA, prop_LNA) %>%
  summarize(mean_HNA = round(mean(prop_HNA), digits = 2), 
            sd_HNA = round(sd(prop_HNA), digits = 2),
            mean_LNA = round(mean(prop_LNA), digits = 2),
            sd_LNA = round(sd(prop_LNA), digits = 2))
##   mean_HNA sd_HNA mean_LNA sd_LNA
## 1    30.41   9.06    69.59   9.06
# Proportion of HNA and LNAper ecosystem
prop_stats <- df_cells %>%
  dplyr::select(samples, Lake, FCM_type, num_cells) %>%
  spread(FCM_type, num_cells) %>%
  mutate(prop_HNA = HNA/Total * 100,
         prop_LNA = LNA/Total * 100) %>%
  dplyr::select(Lake, prop_HNA, prop_LNA) %>%
  rename(HNA = prop_HNA, LNA = prop_LNA) %>%
  group_by(Lake) %>%
  summarize(mean_HNA = round(mean(HNA), digits = 2),
            mean_LNA = round(mean(LNA), digits = 2),
            min_HNA = round(min(HNA), digits = 2), 
            max_HNA = round(max(HNA), digits = 2), 
            min_LNA = round(min(LNA), digits = 2), 
            max_LNA = round(max(LNA), digits = 2), 
            num_samples = n())

datatable(prop_stats, caption = "Statistics of the percentage of each flow cytometry group across the systems", rownames = FALSE)

Figure 1

# Load in the data from each lake 
musk_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/muskegon/muskegon_sampledata_5in10.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

mich_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/michigan/michigan_sampledata_5in10.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

inla_all_df <- read.table(file="data/Chloroplasts_removed/ByLake_Filtering/5in10/inland/inland_sampledata_5in10.tsv", header = TRUE) %>%
  mutate(norep_filter_name = paste(substr(Sample_16S,1,4), substr(Sample_16S,6,9), sep = "")) %>%
  arrange(norep_filter_name)

stopifnot(colnames(musk_all_df) == colnames(mich_all_df))
stopifnot(colnames(musk_all_df) == colnames(inla_all_df))

lakes <- bind_rows(musk_all_df, mich_all_df, inla_all_df)

p1 <- ggplot(df_cells, aes(x = FCM_type, y = num_cells, fill = Lake, color = Lake, shape = Lake)) + 
  geom_point(size = 1.5, position = position_jitterdodge(), color = "black") + 
  geom_boxplot(alpha = 0.7, outlier.shape = NA, show.legend = FALSE, color = "black") +
  scale_color_manual(values = lake_colors,  guide = "none") +
  scale_fill_manual(values = lake_colors) +
  scale_shape_manual(values = lake_shapes) +
  labs(y = "Number of Cells (cells/mL)", x = "FCM Type") +
  mytheme + theme(legend.title = element_blank(), legend.position = c(0.01, 0.95)) +
  guides(colour = guide_legend(override.aes = list(size=2.5)),
         shape = guide_legend(override.aes = list(size=2.5)),
         fill = guide_legend(override.aes = list(size=2.5)))

## Is there a corrlation between HNA and LNA across ecosystems?
# 1. Run the linear model 
lm_allNA_corr <- lm(LNA.cells ~ HNA.cells, data = lakes)
summary(lm(LNA.cells ~ HNA.cells * Lake, data = lakes))
## 
## Call:
## lm(formula = LNA.cells ~ HNA.cells * Lake, data = lakes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3651006  -558525  -113725   511977  3997810 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             3.699e+06  3.590e+05  10.304  < 2e-16 ***
## HNA.cells               3.827e-01  1.704e-01   2.246    0.026 *  
## LakeMichigan           -2.995e+06  4.523e+05  -6.622 4.63e-10 ***
## LakeMuskegon           -2.363e+06  5.411e+05  -4.367 2.21e-05 ***
## HNA.cells:LakeMichigan  5.404e-01  4.257e-01   1.269    0.206    
## HNA.cells:LakeMuskegon  1.027e+00  2.549e-01   4.029 8.50e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1301000 on 167 degrees of freedom
## Multiple R-squared:  0.6116, Adjusted R-squared:    0.6 
## F-statistic:  52.6 on 5 and 167 DF,  p-value: < 2.2e-16
## 2. Extract the R2 and p-value from the linear model: 
lm_allNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_allNA_corr)$adj.r.squared, digits = 2), ",",
                          "p ==", round(unname(summary(lm_allNA_corr)$coefficients[,4][2]), digits = 24), ")")

# 3. Plot it
p2 <- ggplot(lakes, aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) + 
  geom_point(size = 2.5, alpha = 0.9, aes(fill = Lake, shape = Lake)) + 
  scale_fill_manual(values = lake_colors) +
  scale_shape_manual(values = lake_shapes) +
  geom_smooth(method = "lm", color = "black") + 
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_allNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme + theme(legend.position = "none") #theme(legend.title = element_blank(), legend.position = c(0.01, 0.95))

# Figure 1
plot_grid(p1, p2, ncol = 2, nrow =1, labels = c("A","B"), rel_widths = c(0.95, 1))

Figure 1B: Plotted By Lake System

### MUSKEGON ONLY ANALYSIS
# 1. Run the linear model 
lm_muskNA_corr <- lm(LNA.cells ~ HNA.cells, data = musk_all_df)

## 2. Extract the R2 and p-value from the linear model: 
lm_muskNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_muskNA_corr)$adj.r.squared, digits = 2), ",",
                             "p ==", round(unname(summary(lm_muskNA_corr)$coefficients[,4][2]), digits = 9), ")")

musk_corr_plot <- ggplot(musk_all_df, aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") + 
  geom_point(size = 3, shape = 22, aes(fill = Lake)) + 
  geom_smooth(method = "lm", color = "black") + 
  ggtitle("Muskegon Lake") + scale_fill_manual(values = lake_colors) +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_muskNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

### INLAND ONLY ANALYSIS
# 1. Run the linear model 
lm_inlaNA_corr <- lm(LNA.cells ~ HNA.cells, data = filter(inla_all_df, Sample_16S != "Z14003F"))

## 2. Extract the R2 and p-value from the linear model: 
lm_inlaNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_inlaNA_corr)$adj.r.squared, digits = 2), ",",
                              "p ==", round(unname(summary(lm_inlaNA_corr)$coefficients[,4][2]), digits = 2), ")")

inla_corr_plot <- ggplot(filter(inla_all_df, Sample_16S != "Z14003F"), aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") + 
  geom_point(size = 3, shape = 22, aes(fill = Lake)) + 
  geom_smooth(method = "lm", color = "black") + 
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  ggtitle("Inland Lakes") + scale_fill_manual(values = lake_colors) +
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_inlaNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

### MICHIGAN ONLY ANALYSIS
# 1. Run the linear model 
lm_michNA_corr <- lm(LNA.cells ~ HNA.cells, data = mich_all_df)

# Without the Lake Michigan outlier!
summary(lm(LNA.cells ~ HNA.cells, data = filter(mich_all_df, Sample_16S != "M15S2F515")))
## 
## Call:
## lm(formula = LNA.cells ~ HNA.cells, data = filter(mich_all_df, 
##     Sample_16S != "M15S2F515"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -599787 -239620  -95094  241831  987737 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.832e+05  9.793e+04   5.956 3.37e-07 ***
## HNA.cells   1.200e+00  1.797e-01   6.678 2.78e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 354600 on 46 degrees of freedom
## Multiple R-squared:  0.4922, Adjusted R-squared:  0.4812 
## F-statistic: 44.59 on 1 and 46 DF,  p-value: 2.778e-08
## 2. Extract the R2 and p-value from the linear model: 
lm_michNA_corr_stats <- paste("atop(R^2 ==", round(summary(lm_michNA_corr)$adj.r.squared, digits = 2), ",",
                              "p ==", round(unname(summary(lm_michNA_corr)$coefficients[,4][2]), digits = 11), ")")

mich_corr_plot <- ggplot(mich_all_df, aes(x = HNA.cells, y = LNA.cells)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey") + 
  geom_errorbar(aes(ymin = LNA.cells - LNA.sd, max = LNA.cells + LNA.sd), color = "grey") + 
  geom_point(size = 3, shape = 22, aes(fill = Lake)) + 
  geom_smooth(method = "lm", color = "black") + 
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 6.1e+06)) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE), limits = c(0, 1e+07)) +
  ggtitle("Lake Michigan") + scale_fill_manual(values = lake_colors) +
  labs(y = "LNA Cell Density (cells/mL)", x = "HNA Cell Density (cells/mL)") +
  annotate("text", x=5e+06, y=0.8e+06, label=lm_michNA_corr_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

plot_grid(mich_corr_plot, inla_corr_plot, musk_corr_plot, 
          labels = c("A", "B", "C"), nrow = 1, ncol = 3)

Figure 2: FCM vs Productivity

########################  Analysis of HNA/LNA/Total Cells vs Total Productivity
# 1. Run the linear model 
lm_HNA <- lm(tot_bacprod ~ HNA.cells, data = muskegon)

## 2. Extract the R2 and p-value from the linear model: 
lm_HNA_stats <- paste("atop(R^2 ==", round(summary(lm_HNA)$adj.r.squared, digits = 2), ",",
                                    "p ==", round(unname(summary(lm_HNA)$coefficients[,4][2]), digits = 5), ")")


# 3. Plot it
HNA_vs_prod <- ggplot(muskegon, aes(x = HNA.cells, y = tot_bacprod)) + 
  geom_errorbarh(aes(xmin = HNA.cells - HNA.sd, xmax = HNA.cells + HNA.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) + 
  geom_point(size = 2, shape = 22, fill = "deepskyblue4") + 
  geom_smooth(method = "lm", color = "deepskyblue4") + 
  labs(y = "Total Bacterial Production \n (μg C/L/day)", x = "HNA Cell Density \n(cells/mL)") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE), 
                     breaks = c(2e+06, 3e+06)) +
  annotate("text", x=1.65e+06, y=60, label=lm_HNA_stats, parse = TRUE, color = "black", size = 3) +
  mytheme

# 1. Run the linear model 
lm_LNA <- lm(tot_bacprod ~ LNA.cells, data = muskegon)

## 2. Extract the R2 and p-value from the linear model: 
lm_LNA_stats <- paste("atop(R^2 ==", round(summary(lm_LNA)$adj.r.squared, digits = 3), ",",
                      "p ==", round(unname(summary(lm_LNA)$coefficients[,4][2]), digits = 2), ")")

# 3. Plot it
LNA_vs_prod <- ggplot(muskegon, aes(x = LNA.cells, y = tot_bacprod)) + 
  geom_errorbarh(aes(xmin = LNA.cells - LNA.sd, xmax = LNA.cells + LNA.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) + 
  geom_point(size = 2.5, shape = 22, fill = "darkgoldenrod1") + 
  labs(y = "Total Bacterial Production \n (μg C/L/day)", x = "LNA Cell Density \n(cells/mL)") +
  geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "darkgoldenrod1") + 
  annotate("text", x=2.75e+06, y=60, label=lm_LNA_stats, parse = TRUE, color = "red", size = 3) +
  mytheme

# 1. Run the linear model 
lm_total <- lm(tot_bacprod ~ Total.cells, data = muskegon)

## 2. Extract the R2 and p-value from the linear model: 
lm_total_stats <- paste("atop(R^2 ==", round(summary(lm_total)$adj.r.squared, digits = 2), ",",
                      "p ==", round(unname(summary(lm_total)$coefficients[,4][2]), digits = 2), ")")

# 3. Plot it 
Total_vs_prod <- ggplot(muskegon, aes(x = Total.cells, y = tot_bacprod)) + 
  geom_errorbarh(aes(xmin = Total.cells - Total.count.sd, xmax = Total.cells + Total.count.sd), color = "grey", alpha = 0.8) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey", alpha = 0.8) + 
  scale_shape_manual(values = lake_shapes) +
  geom_point(size = 2.5, shape = 22, fill = "black") + 
  labs(y = "Total Bacterial Production \n (μg C/L/day)", x = "Total Cell Density \n(cells/mL)") +
  geom_smooth(method = "lm", color = "black") + 
  #geom_smooth(method = "lm", se = FALSE, linetype = "longdash", color = "red") + 
  annotate("text", x=5.25e+06, y=60, label=lm_total_stats, parse = TRUE, color = "black", size = 3) +
  mytheme


#### ONLY THE THREE PLOTS 
# Put all three plots together into one 
plot_grid(HNA_vs_prod + theme(legend.position = "none"), 
          LNA_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"), 
          Total_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
          labels = c("A", "B", "C"), 
          ncol = 3, rel_widths = c(0.95, 0.8, 0.8))

Fraction HNA vs Productivity

## Plot the fraction of HNA
fmusk <- muskegon %>% 
  mutate(fHNA = HNA.cells/Total.cells,
         fLNA = LNA.cells/Total.cells)

lm_fHNA <- lm(tot_bacprod ~ fHNA, data = fmusk)

## 2. Extract the R2 and p-value from the linear model: 
lm_fHNA_stats <- paste("atop(R^2 ==", round(summary(lm_fHNA)$adj.r.squared, digits = 2), ",",
                      "p ==", round(unname(summary(lm_fHNA)$coefficients[,4][2]), digits = 3), ")")


fHNA_vs_prod <- ggplot(fmusk, aes(x = fHNA, y = tot_bacprod)) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey") + 
  geom_point(size = 3, aes(shape = Lake), fill = "deepskyblue4") + 
  geom_smooth(method = "lm", linetype = "longdash", color = "deepskyblue4") + 
  scale_x_continuous(limits = c(0.15, 0.9), breaks = seq(0.2, 0.9, by = 0.2)) + 
  ylab("Bacterial Production") + xlab("Fraction HNA") +
  scale_shape_manual(values = lake_shapes) + 
  annotate("text", x= 0.22, y=60, label=lm_fHNA_stats, parse = TRUE, color = "black", size = 3) +
  mytheme + theme(legend.position = "none")


### LNA Fraction
## Plot the fraction of HNA
lm_fLNA <- lm(tot_bacprod ~ fLNA, data = fmusk)

## 2. Extract the R2 and p-value from the linear model: 
lm_fLNA_stats <- paste("atop(R^2 ==", round(summary(lm_fLNA)$adj.r.squared, digits = 2), ",",
                      "p ==", round(unname(summary(lm_fLNA)$coefficients[,4][2]), digits = 3), ")")


fLNA_vs_prod <- ggplot(fmusk, aes(x = fLNA, y = tot_bacprod)) + 
  geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, max = tot_bacprod + SD_tot_bacprod), color = "grey") + 
  geom_point(size = 3, aes(shape = Lake), fill = "darkgoldenrod1") + 
  geom_smooth(method = "lm", color = "darkgoldenrod1", fill = "darkgoldenrod1",  linetype = "longdash") + 
  scale_x_continuous(limits = c(0.15, 0.9), breaks = seq(0.2, 0.9, by = 0.2)) + 
  ylab("Bacterial Production") + xlab("Fraction LNA") +
  scale_shape_manual(values = lake_shapes) + 
  annotate("text", x= 0.22, y=60, label=lm_fLNA_stats, parse = TRUE, color = "black", size = 3) +
  mytheme + theme(legend.position = "none")

plot_grid(HNA_vs_prod + theme(legend.position = "none"), 
          fHNA_vs_prod,
          LNA_vs_prod, fLNA_vs_prod,
          labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2,
          align = "h") 

Alternative Figure 1:

first_row <- plot_grid(HNA_vs_prod + theme(legend.position = "none"), 
          LNA_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"), 
          Total_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
          labels = c("A", "B", "C"), 
          ncol = 3, nrow = 1, 
          rel_widths = c(0.95, 0.8, 0.8))

plot_for_legend <- 
  ggplot(df_cells, aes(x = Lake, y = num_cells, fill = FCM_type, color = FCM_type)) + 
  geom_point(size = 1, shape = 22, position = position_jitterdodge()) + 
  scale_fill_manual(values = fcm_colors) + 
  scale_color_manual(values = fcm_colors, guide = "none") + 
  scale_shape_manual(values = 22) +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
  labs(y = "Number of Cells \n (cells/mL)", x = "Lake") +
  theme(legend.position = "right",
           legend.title = element_text(size = 12, face = "bold"), 
           legend.text = element_text(size = 12),
           legend.key.width=unit(1,"line"), 
           legend.key.height=unit(1,"line")) +
  guides(fill = guide_legend(override.aes = list(size=3.5)))

legend1 <- get_legend(plot_for_legend)

legend2 <- get_legend(p2 + theme(legend.position = "right",
           legend.title = element_text(size = 12, face = "bold"), 
           legend.text = element_text(size = 12),
           legend.key.width=unit(1,"line"), 
           legend.key.height=unit(1,"line")) +
  guides(fill = guide_legend(override.aes = list(size=3.5))))


leg_positions <- plot_grid(legend1, legend2, nrow = 2, ncol = 1)

second_row <- plot_grid(NULL, p2, leg_positions, 
                        labels = c("", "D", ""),
                        ncol = 3, nrow = 1,
                        rel_widths = c(0.5, 1, 0.5))

## PLOT THE FIGURE 
plot_grid(first_row, second_row,
          nrow = 2, ncol = 1)

# REPOSITION PLOTS FOR NEW ORDER OF TEXT

new_leg_positions <- plot_grid(legend2, legend1, nrow = 2, ncol = 1)


swap_row1 <- plot_grid(NULL, p2, new_leg_positions, 
                        labels = c("", "A", ""),
                        ncol = 3, nrow = 1,
                        rel_widths = c(0.5, 1, 0.5))

swap_row2 <- plot_grid(HNA_vs_prod + theme(legend.position = "none"), 
          LNA_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"), 
          Total_vs_prod + theme(axis.title.y = element_blank(), legend.position = "none"),
          labels = c("B", "C", "D"), 
          ncol = 3, nrow = 1, 
          rel_widths = c(0.95, 0.8, 0.8))

plot_grid(swap_row1, swap_row2,
          nrow = 2, ncol = 1)

Figure 4: Heatmap

Load RL Ranking data

# Read in Data
dfHNA_musk <- read.csv("Final/FS_Scores/Muskegon_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.09) %>%
  mutate(RL.ranking = 1/RL.ranking, Lake = "Muskegon")%>%
  rename(OTU = X, RL.ranking.HNA = RL.ranking) %>%
  dplyr::select(OTU, RL.ranking.HNA)

dfLNA_musk <- read.csv("Final/FS_Scores/Muskegon_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.09) %>%
  mutate(RL.ranking = -1/RL.ranking, Lake = "Muskegon") %>%
  rename(OTU = X, RL.ranking.LNA = RL.ranking) %>%
  dplyr::select(OTU, RL.ranking.LNA)

dfHNA_inland <- read.csv("Final/FS_Scores/Inland_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.13) %>%
  mutate(RL.ranking = 1/RL.ranking, Lake = "Inland")%>%
  rename(OTU = X, RL.ranking.HNA = RL.ranking) %>%
  dplyr::select(OTU, RL.ranking.HNA)

dfLNA_inland <- read.csv("Final/FS_Scores/Inland_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.108) %>%
  mutate(RL.ranking = -1/RL.ranking, Lake = "Inland") %>%
  rename(OTU = X, RL.ranking.LNA = RL.ranking) %>%
  dplyr::select(OTU, RL.ranking.LNA)

dfHNA_michigan <- read.csv("Final/FS_Scores/Michigan_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.248) %>%
  mutate(RL.ranking = 1/RL.ranking, Lake = "Michigan")%>%
  rename(OTU = X, RL.ranking.HNA = RL.ranking) %>%
  dplyr::select(OTU, RL.ranking.HNA)

dfLNA_michigan <- read.csv("Final/FS_Scores/Michigan_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.286) %>%
  mutate(RL.ranking = -1/RL.ranking, Lake = "Michigan") %>%
  rename(OTU = X, RL.ranking.LNA = RL.ranking) %>%
  dplyr::select(OTU, RL.ranking.LNA)

# Combine HNA and LNA datasets together
muskegon_df <- full_join(dfHNA_musk, dfLNA_musk, by = "OTU") %>%   mutate(Lake = "Muskegon")
inland_df <- full_join(dfHNA_inland, dfLNA_inland, by = "OTU") %>%  mutate(Lake = "Inland")
michigan_df <- full_join(dfHNA_michigan, dfLNA_michigan, by = "OTU") %>%  mutate(Lake = "Michigan")

# Combine all of the three lakes together into one dataframe! 
lake_dfscores <-
  bind_rows(muskegon_df, inland_df, michigan_df) %>%
  rename(HNA = RL.ranking.HNA, LNA = RL.ranking.LNA) %>%
  gather(key = FCM_type, value = RL.ranking, HNA:LNA)

Plot figure 4: 1/RL Ranking

# Reformat data to be matrix style for heatmapping
# HNA/LNA: Inland: 0.13/0.108,  Michigan: 0.248/0.286,  Muskegon: 0.09/0.09
musk_dat <- 
  muskegon_df %>%
  dplyr::select(-Lake) %>%
  dplyr::filter(RL.ranking.HNA > 0.09 | RL.ranking.LNA < -0.09) %>%  # THIS IS A MISTAKE
  rename(Muskegon_HNA = RL.ranking.HNA, 
         Muskegon_LNA = RL.ranking.LNA)%>%
  mutate(Muskegon_LNA = Muskegon_LNA * -1) 

inland_dat <- 
  inland_df %>%
  dplyr::select(-Lake) %>%
  dplyr::filter(RL.ranking.HNA > 0.13 | RL.ranking.LNA < -0.108) %>%  # THIS IS A MISTAKE
  rename(Inland_HNA = RL.ranking.HNA, 
         Inland_LNA = RL.ranking.LNA) %>%
  mutate(Inland_LNA = Inland_LNA * -1)

michigan_dat <- 
  michigan_df %>%
  dplyr::select(-Lake) %>%
  dplyr::filter(RL.ranking.HNA > 0.248 | RL.ranking.LNA < -0.286) %>%  # THIS IS A MISTAKE
  rename(Michigan_HNA = RL.ranking.HNA, 
         Michigan_LNA = RL.ranking.LNA) %>%
  mutate(Michigan_LNA = Michigan_LNA * -1)

matrix_scores <- 
  full_join(musk_dat, inland_dat, by = "OTU") %>%
  full_join(michigan_dat, by = "OTU") %>%
  tibble::column_to_rownames(var = "OTU") %>%
  as.matrix()

matrix_scores[is.na(matrix_scores)] <- 0

breakers <- seq(min(matrix_scores, na.rm = T), max(matrix_scores, na.rm = T), length.out = 21)
my_palette <- colorRampPalette(c("white","gray65", "red")) (n=20)

colz <- c("#FF933F", "#FF933F", "#EC4863", "#EC4863", "#5C2849", "#5C2849")

heatmap.2(matrix_scores, 
          distfun = function(x) dist(x, method = "euclidean"),
          hclustfun = function(x) hclust(x,method = "complete"),
          col = my_palette, breaks=breakers, trace="none",
          key=TRUE, symkey=FALSE, density.info="none", srtCol=30,
          margins=c(5.5,7), cexRow=1.25,cexCol=1.25,
          key.xlab="1/RL Score",
          ColSideColors=colz)

Load RL Score data

# Read in Data
dfHNA_musk_scores <- read.csv("Final/FS_Scores/Muskegon_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.09) %>%
  mutate(Lake = "Muskegon")%>%
  rename(OTU = X, RL.score.HNA = RL.score) %>%
  dplyr::select(OTU, RL.score.HNA)

dfLNA_musk_scores <- read.csv("Final/FS_Scores/Muskegon_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.09) %>%
  mutate(Lake = "Muskegon") %>%
  rename(OTU = X, RL.score.LNA = RL.score) %>%
  dplyr::select(OTU, RL.score.LNA)

dfHNA_inland_scores <- read.csv("Final/FS_Scores/Inland_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.13) %>%
  mutate(Lake = "Inland")%>%
  rename(OTU = X, RL.score.HNA = RL.score) %>%
  dplyr::select(OTU, RL.score.HNA)

dfLNA_inland_scores <- read.csv("Final/FS_Scores/Inland_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.108) %>%
  mutate(Lake = "Inland") %>%
  rename(OTU = X, RL.score.LNA = RL.score) %>%
  dplyr::select(OTU, RL.score.LNA)

dfHNA_michigan_scores <- read.csv("Final/FS_Scores/Michigan_fs_scores_HNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.248) %>%
  mutate(Lake = "Michigan")%>%
  rename(OTU = X, RL.score.HNA = RL.score) %>%
  dplyr::select(OTU, RL.score.HNA)

dfLNA_michigan_scores <- read.csv("Final/FS_Scores/Michigan_fs_scores_LNA_5seq10.csv") %>%
  dplyr::filter(RL.score > 0.286) %>%
  mutate(Lake = "Michigan") %>%
  rename(OTU = X, RL.score.LNA = RL.score) %>%
  dplyr::select(OTU, RL.score.LNA)

# Combine HNA and LNA datasets together
muskegon_df_scores <- full_join(dfHNA_musk_scores, dfLNA_musk_scores, by = "OTU") %>%   mutate(Lake = "Muskegon")
inland_df_scores <- full_join(dfHNA_inland_scores, dfLNA_inland_scores, by = "OTU") %>%  mutate(Lake = "Inland")
michigan_df_scores <- full_join(dfHNA_michigan_scores, dfLNA_michigan_scores, by = "OTU") %>%  mutate(Lake = "Michigan")

# Combine all of the three lakes together into one dataframe! 
scores_RL_df <-
  bind_rows(muskegon_df_scores, inland_df_scores, michigan_df_scores) %>%
  rename(HNA = RL.score.HNA, LNA = RL.score.LNA) %>%
  gather(key = FCM_type, value = RL.ranking, HNA:LNA)

Plot figure 4: 1/RL Ranking

# Reformat data to be matrix style for heatmapping
# HNA/LNA: Inland: 0.13/0.108,  Michigan: 0.248/0.286,  Muskegon: 0.09/0.09
musk_dat_scores <- 
  muskegon_df_scores %>%
  dplyr::select(-Lake) %>%
  dplyr::filter(RL.score.HNA > 0.09 | RL.score.LNA < 0.09) %>%
  rename(Muskegon_HNA = RL.score.HNA, 
         Muskegon_LNA = RL.score.LNA)

inland_dat_scores <- 
  inland_df_scores %>%
  dplyr::select(-Lake) %>%
  dplyr::filter(RL.score.HNA > 0.13 | RL.score.LNA < 0.108) %>%
  rename(Inland_HNA = RL.score.HNA, 
         Inland_LNA = RL.score.LNA) 

michigan_dat_scores <- 
  michigan_df_scores %>%
  dplyr::select(-Lake) %>%
  dplyr::filter(RL.score.HNA > 0.248 | RL.score.LNA < 0.286) %>%
  rename(Michigan_HNA = RL.score.HNA, 
         Michigan_LNA = RL.score.LNA) 

matrix_RLscores <- 
  full_join(musk_dat_scores, inland_dat_scores, by = "OTU") %>%
  full_join(michigan_dat_scores, by = "OTU") %>%
  tibble::column_to_rownames(var = "OTU") %>%
  as.matrix()

matrix_RLscores[is.na(matrix_RLscores)] <- 0

breakers <- seq(min(matrix_RLscores, na.rm = T), max(matrix_RLscores, na.rm = T), length.out = 21)
my_palette <- colorRampPalette(c("white","gray65", "red")) (n=20)

# Set the colors of the different columns going in
colz <- c("#FF933F", "#FF933F", "#EC4863", "#EC4863", "#5C2849", "#5C2849")

heatmap.2(matrix_RLscores, 
          distfun = function(x) dist(x, method = "euclidean"),
          hclustfun = function(x) hclust(x,method = "complete"),
          col = my_palette, breaks = breakers, trace = "none",
          key = TRUE, symkey = FALSE, density.info = "none", srtCol = 30,
          margins=c(5.5,7), cexRow = 1.25,cexCol = 1.25,
          key.xlab = "RL Score",ColSideColors = colz,
          lhei = c(1,9))

Figure 5: Phylogenetic Tree

#### PHYLOGENETIC ANALYSIS
load("data/phyloseq.RData")
physeq.otu
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 13370 taxa and 859 samples ]
## tax_table()   Taxonomy Table:    [ 13370 taxa by 7 taxonomic ranks ]
otu_scores_df <- matrix_scores %>%
  as.data.frame() %>%
  tibble::rownames_to_column("OTU")

vector_of_otus <- as.vector(otu_scores_df$OTU)

physeq <- physeq.otu %>%
  subset_taxa(., taxa_names(physeq.otu) %in% vector_of_otus)  

# Which OTU is missing?
setdiff(sort(vector_of_otus), sort(taxa_names(physeq)))
## character(0)
setdiff(sort(taxa_names(physeq)), sort(vector_of_otus))
## character(0)
######################################### FASTTREE
fast_tree <- read.tree(file="data/fasttree/fasttree_newick_tree_HNALNA_rmN.tre")

fast_tree_tip_order <- data.frame(fast_tree$tip.label) %>%
  rename(OTU = fast_tree.tip.label)


fasttree_physeq <- merge_phyloseq(physeq, phy_tree(fast_tree))

# Fix the taxonomy names 
colnames(tax_table(fasttree_physeq)) <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")

###################################################################### ADD THE PROTEOBACTERIA TO THE PHYLA
phy <- data.frame(tax_table(fasttree_physeq))
Phylum <- as.character(phy$Phylum)
Class <- as.character(phy$Class)

for  (i in 1:length(Phylum)){ 
  if (Phylum[i] == "Proteobacteria"){
    Phylum[i] <- Class[i]  } }

phy$Phylum <- Phylum # Add the new phylum level data back to phy

t <- tax_table(as.matrix(phy))
tax_table(fasttree_physeq) <- t

fasttree_tax <- data.frame(tax_table(fasttree_physeq)) %>%
  tibble::rownames_to_column(var = "OTU")

fasttree_df2 <- read.csv("data/fasttree/OTUnames_based_on_RLscores_MANUAL.csv") %>%
  left_join(., fasttree_tax, by = "OTU") %>%
  tibble::column_to_rownames("OTU") %>%
  dplyr::select(Phylum)


## Let's root the tree
is.rooted(fast_tree)
## [1] FALSE
test_tree <- root(fast_tree, outgroup = "Otu001533", resolve.root = TRUE)
is.rooted(test_tree)
## [1] TRUE
phyfcm_fasttree_df3 <- read.csv("data/fasttree/OTUnames_based_on_RLscores_MANUAL.csv") %>%
  left_join(., fasttree_tax, by = "OTU") %>%
  tibble::column_to_rownames("OTU") %>%
  dplyr::rename(FCM = fcm_type) %>%
  dplyr::select(Phylum, FCM)

rooted_tree <- 
  ggplot(test_tree, aes(x, y)) + geom_tree() + theme_tree() +
  geom_tiplab(size=3, align=TRUE, linesize=.5) #+
  #geom_nodelab(vjust=-.5, size=3) 

  
gheatmap(rooted_tree, phyfcm_fasttree_df3, offset = 0.1, width=0.3, font.size=3, colnames_angle=0, hjust=0.5) +
  scale_fill_manual(values = phylum_colors,  
                    breaks = c("Actinobacteria", "Alphaproteobacteria", "Bacteria_unclassified", "Bacteroidetes", "Betaproteobacteria",
                               "Cyanobacteria","Deltaproteobacteria", "Firmicutes", "Gammaproteobacteria","Omnitrophica", "Planctomycetes",
                               "Proteobacteria_unclassified", "unknown_unclassified", "Verrucomicrobia", "Both", "HNA", "LNA"))

Session Information

devtools::session_info() # This will include session info with all R package version information
##  setting  value                       
##  version  R version 3.5.0 (2018-04-23)
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/Toronto             
##  date     2018-06-19                  
## 
##  package           * version  date       source        
##  ade4                1.7-11   2018-04-05 CRAN (R 3.5.0)
##  animation           2.5      2017-03-30 CRAN (R 3.5.0)
##  ape               * 5.1      2018-04-04 CRAN (R 3.5.0)
##  assertthat          0.2.0    2017-04-11 CRAN (R 3.5.0)
##  backports           1.1.2    2017-12-13 CRAN (R 3.5.0)
##  base              * 3.5.0    2018-04-24 local         
##  base64enc           0.1-3    2015-07-28 CRAN (R 3.5.0)
##  bindr               0.1.1    2018-03-13 CRAN (R 3.5.0)
##  bindrcpp          * 0.2.2    2018-03-29 CRAN (R 3.5.0)
##  Biobase             2.40.0   2018-05-01 Bioconductor  
##  BiocGenerics        0.26.0   2018-05-01 Bioconductor  
##  biomformat          1.8.0    2018-05-01 Bioconductor  
##  Biostrings          2.48.0   2018-05-01 Bioconductor  
##  bitops              1.0-6    2013-08-17 CRAN (R 3.5.0)
##  broom               0.4.4    2018-03-29 CRAN (R 3.5.0)
##  caTools             1.17.1   2014-09-10 CRAN (R 3.5.0)
##  cellranger          1.1.0    2016-07-27 CRAN (R 3.5.0)
##  cli                 1.0.0    2017-11-05 CRAN (R 3.5.0)
##  cluster             2.0.7-1  2018-04-13 CRAN (R 3.5.0)
##  clusterGeneration   1.3.4    2015-02-18 CRAN (R 3.5.0)
##  coda                0.19-1   2016-12-08 CRAN (R 3.5.0)
##  codetools           0.2-15   2016-10-05 CRAN (R 3.5.0)
##  colorspace          1.3-2    2016-12-14 CRAN (R 3.5.0)
##  combinat            0.0-8    2012-10-29 CRAN (R 3.5.0)
##  compiler            3.5.0    2018-04-24 local         
##  cowplot           * 0.9.2    2017-12-17 CRAN (R 3.5.0)
##  crayon              1.3.4    2017-09-16 CRAN (R 3.5.0)
##  crosstalk           1.0.0    2016-12-21 CRAN (R 3.5.0)
##  d3heatmap         * 0.6.1.2  2018-02-01 CRAN (R 3.5.0)
##  data.table          1.11.2   2018-05-08 CRAN (R 3.5.0)
##  datasets          * 3.5.0    2018-04-24 local         
##  devtools          * 1.13.5   2018-02-18 CRAN (R 3.5.0)
##  digest              0.6.15   2018-01-28 CRAN (R 3.5.0)
##  dplyr             * 0.7.5    2018-05-19 CRAN (R 3.5.0)
##  DT                * 0.4      2018-01-30 CRAN (R 3.5.0)
##  evaluate            0.10.1   2017-06-24 CRAN (R 3.5.0)
##  expm                0.999-2  2017-03-29 CRAN (R 3.5.0)
##  fastmatch           1.1-0    2017-01-28 CRAN (R 3.5.0)
##  forcats           * 0.3.0    2018-02-19 CRAN (R 3.5.0)
##  foreach             1.4.4    2017-12-12 CRAN (R 3.5.0)
##  foreign             0.8-70   2017-11-28 CRAN (R 3.5.0)
##  gdata               2.18.0   2017-06-06 CRAN (R 3.5.0)
##  ggplot2           * 2.2.1    2016-12-30 CRAN (R 3.5.0)
##  ggtree            * 1.12.0   2018-05-01 Bioconductor  
##  glue                1.2.0    2017-10-29 CRAN (R 3.5.0)
##  gplots            * 3.0.1    2016-03-30 CRAN (R 3.5.0)
##  graphics          * 3.5.0    2018-04-24 local         
##  grDevices         * 3.5.0    2018-04-24 local         
##  grid                3.5.0    2018-04-24 local         
##  gtable              0.2.0    2016-02-26 CRAN (R 3.5.0)
##  gtools              3.5.0    2015-05-29 CRAN (R 3.5.0)
##  haven               1.1.1    2018-01-18 CRAN (R 3.5.0)
##  hms                 0.4.2    2018-03-10 CRAN (R 3.5.0)
##  htmltools           0.3.6    2017-04-28 CRAN (R 3.5.0)
##  htmlwidgets         1.2      2018-04-19 CRAN (R 3.5.0)
##  httpuv              1.4.3    2018-05-10 CRAN (R 3.5.0)
##  httr                1.3.1    2017-08-20 CRAN (R 3.5.0)
##  igraph              1.2.1    2018-03-10 CRAN (R 3.5.0)
##  IRanges             2.14.10  2018-05-16 Bioconductor  
##  iterators           1.0.9    2017-12-12 CRAN (R 3.5.0)
##  jsonlite            1.5      2017-06-01 CRAN (R 3.5.0)
##  KernSmooth          2.23-15  2015-06-29 CRAN (R 3.5.0)
##  knitr               1.20     2018-02-20 CRAN (R 3.5.0)
##  labeling            0.3      2014-08-23 CRAN (R 3.5.0)
##  later               0.7.2    2018-05-01 CRAN (R 3.5.0)
##  lattice             0.20-35  2017-03-25 CRAN (R 3.5.0)
##  lazyeval            0.2.1    2017-10-29 CRAN (R 3.5.0)
##  lubridate           1.7.4    2018-04-11 CRAN (R 3.5.0)
##  magrittr            1.5      2014-11-22 CRAN (R 3.5.0)
##  maps              * 3.3.0    2018-04-03 CRAN (R 3.5.0)
##  MASS                7.3-50   2018-04-30 CRAN (R 3.5.0)
##  Matrix              1.2-14   2018-04-13 CRAN (R 3.5.0)
##  memoise             1.1.0    2017-04-21 CRAN (R 3.5.0)
##  methods           * 3.5.0    2018-04-24 local         
##  mgcv                1.8-23   2018-01-21 CRAN (R 3.5.0)
##  mime                0.5      2016-07-07 CRAN (R 3.5.0)
##  mnormt              1.5-5    2016-10-15 CRAN (R 3.5.0)
##  modelr              0.1.2    2018-05-11 CRAN (R 3.5.0)
##  msm                 1.6.6    2018-02-02 CRAN (R 3.5.0)
##  multtest            2.36.0   2018-05-01 Bioconductor  
##  munsell             0.4.3    2016-02-13 CRAN (R 3.5.0)
##  mvtnorm             1.0-7    2018-01-26 CRAN (R 3.5.0)
##  nlme                3.1-137  2018-04-07 CRAN (R 3.5.0)
##  numDeriv            2016.8-1 2016-08-27 CRAN (R 3.5.0)
##  parallel            3.5.0    2018-04-24 local         
##  permute             0.9-4    2016-09-09 CRAN (R 3.5.0)
##  phangorn            2.4.0    2018-02-15 CRAN (R 3.5.0)
##  phyloseq          * 1.24.0   2018-05-01 Bioconductor  
##  phytools          * 0.6-44   2017-11-12 CRAN (R 3.5.0)
##  pillar              1.2.2    2018-04-26 CRAN (R 3.5.0)
##  pkgconfig           2.0.1    2017-03-21 CRAN (R 3.5.0)
##  plotrix             3.7-1    2018-05-14 CRAN (R 3.5.0)
##  plyr                1.8.4    2016-06-08 CRAN (R 3.5.0)
##  png                 0.1-7    2013-12-03 CRAN (R 3.5.0)
##  promises            1.0.1    2018-04-13 CRAN (R 3.5.0)
##  psych               1.8.4    2018-05-06 CRAN (R 3.5.0)
##  purrr             * 0.2.4    2017-10-18 CRAN (R 3.5.0)
##  quadprog            1.5-5    2013-04-17 CRAN (R 3.5.0)
##  R6                  2.2.2    2017-06-17 CRAN (R 3.5.0)
##  Rcpp                0.12.17  2018-05-18 CRAN (R 3.5.0)
##  readr             * 1.1.1    2017-05-16 CRAN (R 3.5.0)
##  readxl              1.1.0    2018-04-20 CRAN (R 3.5.0)
##  reshape2            1.4.3    2017-12-11 CRAN (R 3.5.0)
##  rhdf5               2.24.0   2018-05-01 Bioconductor  
##  Rhdf5lib            1.2.1    2018-05-17 Bioconductor  
##  rlang               0.2.0    2018-02-20 CRAN (R 3.5.0)
##  rmarkdown           1.9      2018-03-01 CRAN (R 3.5.0)
##  rprojroot           1.3-2    2018-01-03 CRAN (R 3.5.0)
##  rstudioapi          0.7      2017-09-07 CRAN (R 3.5.0)
##  rvcheck             0.1.0    2018-05-23 CRAN (R 3.5.0)
##  rvest               0.3.2    2016-06-17 CRAN (R 3.5.0)
##  S4Vectors           0.18.2   2018-05-16 Bioconductor  
##  scales              0.5.0    2017-08-24 CRAN (R 3.5.0)
##  scatterplot3d       0.3-41   2018-03-14 CRAN (R 3.5.0)
##  shiny               1.1.0    2018-05-17 CRAN (R 3.5.0)
##  splines             3.5.0    2018-04-24 local         
##  stats             * 3.5.0    2018-04-24 local         
##  stats4              3.5.0    2018-04-24 local         
##  stringi             1.2.2    2018-05-02 CRAN (R 3.5.0)
##  stringr           * 1.3.1    2018-05-10 CRAN (R 3.5.0)
##  survival            2.42-3   2018-04-16 CRAN (R 3.5.0)
##  tibble            * 1.4.2    2018-01-22 CRAN (R 3.5.0)
##  tidyr             * 0.8.1    2018-05-18 CRAN (R 3.5.0)
##  tidyselect          0.2.4    2018-02-26 CRAN (R 3.5.0)
##  tidytree            0.1.8    2018-04-19 CRAN (R 3.5.0)
##  tidyverse         * 1.2.1    2017-11-14 CRAN (R 3.5.0)
##  tools               3.5.0    2018-04-24 local         
##  treeio              1.4.0    2018-05-01 Bioconductor  
##  utils             * 3.5.0    2018-04-24 local         
##  vegan               2.5-2    2018-05-17 CRAN (R 3.5.0)
##  withr               2.1.2    2018-03-15 CRAN (R 3.5.0)
##  xml2                1.2.0    2018-01-24 CRAN (R 3.5.0)
##  xtable              1.8-2    2016-02-05 CRAN (R 3.5.0)
##  XVector             0.20.0   2018-05-01 Bioconductor  
##  yaml                2.1.19   2018-05-01 CRAN (R 3.5.0)
##  zlibbioc            1.26.0   2018-05-01 Bioconductor